load libraries

library(tidyverse)
## -- Attaching packages ------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ---------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

load files

SNPs<- read.table("data/23andMe_complete.txt", header = TRUE, sep = "\t")
head(SNPs, n=10)
##          rsid chromosome position genotype
## 1   rs4477212          1    82154       AA
## 2   rs3094315          1   752566       AA
## 3   rs3131972          1   752721       GG
## 4  rs12124819          1   776546       AG
## 5  rs11240777          1   798959       AG
## 6   rs6681049          1   800007       CC
## 7   rs4970383          1   838555       AC
## 8   rs4475691          1   846808       CT
## 9   rs7537756          1   854250       AG
## 10 rs13302982          1   861808       GG

to adjust figure size

SNPs$chromosome = ordered(SNPs$chromosome, levels=c(seq(1, 22), "X", "Y", "MT"))
ggplot(data = SNPs) + 
  geom_bar(mapping = aes(x = genotype, fill = chromosome)) + 
  coord_polar() +
  ggtitle("Total SNPs for each genotype") +
  ylab("Total number of SNPs") +
  xlab("Genotype")

graphic output

SNPs$chromosome = ordered(SNPs$chromosome, levels=c(seq(1, 22), "X", "Y", "MT"))
ggplot(data = SNPs) + 
  geom_bar(mapping = aes(x = genotype, fill = chromosome)) + 
  coord_polar() +
  ggtitle("Total SNPs for each genotype") +
  ylab("Total number of SNPs") +
  xlab("Genotype")

pdf("SNP_example_plot.pdf", width=6, height=3)
ggplot(data = SNPs) +
geom_bar(mapping = aes(x = chromosome, fill = genotype))
dev.off()
## png 
##   2

plot to PNG

ppi <- 300
png("SNP_example_plot.png", width=6*ppi, height=6*ppi, res=ppi)
ggplot(data = SNPs) +
geom_bar(mapping = aes(x = chromosome, fill = genotype))
dev.off()
## png 
##   2

load file

interactive graphs

install.packages("plotly")
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p <- ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
geom_point()
ggplotly(p)
library(plotly)
ggplotly(
ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
geom_point())
install.packages('DT')
library(DT)
datatable(iris)

Exercise 1

Add title and labels for the x and y axis to Lab3 ex1. Color the bars blue

data.frame(summary(SNPs$chromosome))
##    summary.SNPs.chromosome.
## 1                     76909
## 2                     77346
## 3                     63285
## 4                     55017
## 5                     56019
## 6                     63245
## 7                     50965
## 8                     49215
## 9                     42969
## 10                    50322
## 11                    47972
## 12                    47125
## 13                    36078
## 14                    30818
## 15                    28400
## 16                    30167
## 17                    26688
## 18                    27971
## 19                    18533
## 20                    23834
## 21                    13404
## 22                    14100
## X                     26007
## Y                      1766
## MT                     2459
df <- data.frame(chromosome=c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "MT"), SNPs=c("76909", "77346", "63285", "55017", "56019", "63245", "50965", "49215", "42969", "50322", "47972", "47125", "36078", "30818", "28400", "30167", "26688", "27971", "18533", "23834", "13404", "14100", "26007", "1766", "2459"), stringsAsFactors=FALSE)
summary(df)
##   chromosome            SNPs          
##  Length:25          Length:25         
##  Class :character   Class :character  
##  Mode  :character   Mode  :character
str(df)
## 'data.frame':    25 obs. of  2 variables:
##  $ chromosome: chr  "1" "2" "3" "4" ...
##  $ SNPs      : chr  "76909" "77346" "63285" "55017" ...
ab <- ggplot(data = df) + 
  geom_bar(mapping = aes(x = chromosome, y = SNPs), stat="identity", fill="blue")
ab

ab + ggtitle("SNPs in the human genome")

Exercise 2

mycolour <-c("AC" = "BROWN", "AG" = "BROWN", "AT" = "BROWN", "CG" = "BROWN", "CT" = "BROWN", "GT" = "BROWN", "AA" = "BLUE", "CC" = "BLUE", "GG" = "BLUE", "TT" = "BLUE", "A" = "GREEN", "C" = "GREEN", "G" = "GREEN", "T" = "GREEN", "D" = "MAGENTA", "DD" = "MAGENTA", "DI" = "MAGENTA", "I" = "MAGENTA", "II" = "MAGENTA", "--" = "#999999")
SNPs$chromosome = ordered(SNPs$chromosome, levels=c(seq(1, 22), "X", "Y", "MT"))
p <- ggplot(SNPs, aes(SNPs$chromosome, fill = genotype))+
  geom_bar(color="black") +
  ggtitle("Number of SNPs in the human genome") +
  ylab("total number of SNPs") +
  xlab("chromosome number")
p + scale_fill_manual(values= mycolour)

Exercise 3

Genotype counts per chromosome

Genotype counts per chromosome

Exercise 4

SNPs$chromosome = ordered(SNPs$chromosome, levels=c(seq(1, 22), "X", "Y", "MT"))
ad <- ggplot(data = SNPs) + 
  geom_bar(mapping = aes( x = chromosome, fill = chromosome), position = "dodge", color = "black") +
  facet_wrap(~genotype, ncol = 1)
ae <-ad + theme(axis.title.x = element_text(size=20),
                axis.text.y  = element_text(size=20)
)
ae + ggtitle("Genotype count per chromosome")

Exercise 5

library(plotly)
ggplotly(
  ggplot(SNPs, aes(chromosome, fill = genotype)) + 
  geom_bar (position = "dodge") +
  facet_wrap(~genotype, ncol= 2) +
  ggtitle("genotypes across chromosome") +
  ylab("genotype counts") +
  xlab("chromosome")
)

Exercise 6

Chromosome_Y <- subset(SNPs, chromosome == "Y")
head(Chromosome_Y, n=10)
##              rsid chromosome position genotype
## 956390   i4000095          Y  2649694        T
## 956391 rs11575897          Y  2655180        G
## 956392  rs2534636          Y  2657176        C
## 956393   i3000043          Y  2658271        G
## 956394   i3000045          Y  2658869        G
## 956395   i4000162          Y  2663707        T
## 956396 rs13303871          Y  2679100        G
## 956397 rs35284970          Y  2734854        C
## 956398   i4000052          Y  2740274        T
## 956399     rs3895          Y  2744628        T